const char *cl_source_bfs_iiit =
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics: enable\n"
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics: enable\n"
"\n"
"\n"
"//Sungpack Hong, Sang Kyun Kim, Tayo Oguntebi, and Kunle Olukotun. 2011. \n"
"//Accelerating CUDA graph algorithms at maximum warp. \n"
"//In Proceedings of the 16th ACM symposium on Principles and practice of \n"
"//parallel programming (PPoPP '11). ACM, New York, NY, USA, 267-276. \n"
"// ****************************************************************************\n"
"// Function: BFS_kernel_warp\n"
"//\n"
"// Purpose:\n"
"//   Perform BFS on the given graph\n"
"//\n"
"// Arguments:\n"
"//   levels: array that stores the level of vertices \n"
"//   edgeArray: array that gives offset of a vertex in edgeArrayAux\n"
"//   edgeArrayAux: array that gives the edge list of a vertex \n"
"//   W_SZ: the warp size to use to process vertices\n"
"//   CHUNK_SZ: the number of vertices each warp processes\n"
"//   numVertices: number of vertices in the given graph \n"
"//   curr: the current BFS level \n"
"//   flag: set when more vertices remain to be traversed\n"
"//\n"
"// Returns:  nothing\n"
"//\n"
"// Programmer: Aditya Sarwade\n"
"// Creation: June 16, 2011\n"
"//\n"
"// Modifications:\n"
"//\n"
"// ****************************************************************************\n"
"__kernel void BFS_kernel_warp(\n"
"        __global unsigned int *levels,\n"
"        __global unsigned int *edgeArray,\n"
"        __global unsigned int *edgeArrayAux,\n"
"        int W_SZ,\n"
"        int CHUNK_SZ,\n"
"        unsigned int numVertices,\n"
"        int curr,\n"
"        __global int *flag)\n"
"{\n"
"    \n"
"    int tid = get_global_id(0);\n"
"    int W_OFF = tid % W_SZ;\n"
"    int W_ID = tid / W_SZ; \n"
"    int v1= W_ID * CHUNK_SZ;\n"
"    int chk_sz=CHUNK_SZ+1;\n"
"    \n"
"    if((v1+CHUNK_SZ)>=numVertices) \n"
"    {\n"
"        chk_sz =  numVertices-v1+1;//(v1+CHUNK_SZ) - numVertices;\n"
"        if(chk_sz<0)\n"
"            chk_sz=0;\n"
"    }\n"
"\n"
"    //each warp processes nodes one by one\n"
"    for(int v=v1; v< chk_sz-1+v1; v++)\n"
"    {\n"
"        if(levels[v] == curr)\n"
"        {\n"
"            unsigned int num_nbr = edgeArray[v+1]-edgeArray[v];\n"
"            unsigned int nbr_off = edgeArray[v];\n"
"            for(int i=W_OFF; i<num_nbr; i+=W_SZ)\n"
"            {\n"
"               int v = edgeArrayAux[i + nbr_off];\n"
"               if(levels[v]==UINT_MAX)\n"
"               {\n"
"                    levels[v] = curr + 1;\n"
"                    *flag = 1;\n"
"               } \n"
"            }\n"
"        }\n"
"    }\n"
"}\n"
;
